LSTM-ED for Anomaly Detection in Time Series Data¶

In [ ]:
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

from dataset import *
from plots import *
from metrics import *
from models_funtions import *

# Set style for matplotlib
plt.style.use("Solarize_Light2")

import plotly.io as pio
pio.renderers.default = "notebook_connected"
In [ ]:
# Path to the root directory of the dataset
ROOTDIR_DATASET_NORMAL =  '../dataset/normal'
ROOTDIR_DATASET_ANOMALY = '../dataset/collisions'

# TF_ENABLE_ONEDNN_OPTS=0 means that the model will not use the oneDNN library for optimization

import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

Variours parameters¶

In [ ]:
#freq = '1.0'
#freq = '0.1'
#freq = '0.01'
freq = '0.005'

file_name_normal = "_20220811_rbtc_"
file_name_collisions = "_collision_20220811_rbtc_"

recording_normal = [0, 2, 3, 4]
recording_collisions = [1, 5]

freq_str = freq.replace(".", "_")
features_folder_normal = f"./features/normal{freq_str}/"
features_folder_collisions = f"./features/collisions{freq_str}/"

Data¶

In [ ]:
df_features_normal, df_normal_raw, _ = get_dataframes(ROOTDIR_DATASET_NORMAL, file_name_normal, recording_normal, freq, f"{features_folder_normal}")
df_features_collisions, df_collisions_raw, df_collisions_raw_action = get_dataframes(ROOTDIR_DATASET_ANOMALY, file_name_collisions, recording_collisions, freq, f"{features_folder_collisions}1_5/")
df_features_collisions_1, df_collisions_raw_1, df_collisions_raw_action_1 = get_dataframes(ROOTDIR_DATASET_ANOMALY, file_name_collisions, [1], freq, f"{features_folder_collisions}1/")
df_features_collisions_5, df_collisions_raw_5, df_collisions_raw_action_5 = get_dataframes(ROOTDIR_DATASET_ANOMALY, file_name_collisions, [5], freq, f"{features_folder_collisions}5/")
Loading data.
Found 31 different actions.
Loading data done.

Loading features from file.
--- 0.05362510681152344 seconds ---
Loading data.
Found 31 different actions.
Loading data done.

Loading features from file.
--- 0.03210330009460449 seconds ---
Loading data.
Found 31 different actions.
Loading data done.

Loading features from file.
--- 0.026070356369018555 seconds ---
Loading data.
Found 31 different actions.
Loading data done.

Loading features from file.
--- 0.02799534797668457 seconds ---
In [ ]:
X_train, y_train, X_test, y_test, df_test = get_train_test_data(df_features_normal, df_features_collisions, full_normal=True)
X_train_1, y_train_1, X_test_1, y_test_1, df_test_1 = get_train_test_data(df_features_normal, df_features_collisions_1, full_normal=True)
X_train_5, y_train_5, X_test_5, y_test_5, df_test_5 = get_train_test_data(df_features_normal, df_features_collisions_5, full_normal=True)
c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\sklearn\base.py:493: UserWarning:

X does not have valid feature names, but VarianceThreshold was fitted with feature names

c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\sklearn\base.py:493: UserWarning:

X does not have valid feature names, but VarianceThreshold was fitted with feature names

c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\sklearn\base.py:493: UserWarning:

X does not have valid feature names, but VarianceThreshold was fitted with feature names

Collisions¶

In [ ]:
collisions_rec1, collisions_init1 = get_collisions('1', ROOTDIR_DATASET_ANOMALY)
collisions_rec5, collisions_init5 = get_collisions('5', ROOTDIR_DATASET_ANOMALY)

# Merge the collisions of the two recordings in one dataframe
collisions_rec = pd.concat([collisions_rec1, collisions_rec5])
collisions_init = pd.concat([collisions_init1, collisions_init5])
In [ ]:
collisions_zones, y_collisions = get_collisions_zones_and_labels(collisions_rec, collisions_init, df_features_collisions)
collisions_zones_1, y_collisions_1 = get_collisions_zones_and_labels(collisions_rec1, collisions_init1, df_features_collisions_1)
collisions_zones_5, y_collisions_5 = get_collisions_zones_and_labels(collisions_rec5, collisions_init5, df_features_collisions_5)

DAGMM for Anomaly Detection in Time Series Data¶

In [ ]:
from algorithms.dagmm import DAGMM

classifier = DAGMM(
    num_epochs=10,
    lambda_energy=0.1,
    lambda_cov_diag=0.005,
    lr=1e-4,
    batch_size=32,
    gmm_k=5,
    normal_percentile=80,
    sequence_length=30,
    autoencoder_type=DAGMM.AutoEncoder.LSTM,  # Using LSTM autoencoder
    hidden_size=32,
    autoencoder_args={
        'n_layers': (4, 4),
        'use_bias': (True, True),
        'dropout': (0.1, 0.1)
    },
    seed=42,
    gpu=None,  # Set to None for CPU, or specify GPU index if available
    details=True
)

# Train the DAGMM on normal data
classifier.fit(X_train)
print("DAGMM training completed.")
100%|██████████| 10/10 [00:37<00:00,  3.76s/it]
DAGMM training completed.

Predictions¶

In [ ]:
df_test = get_statistics(X_test, y_collisions, classifier, df_test, freq, threshold_type="mad")
df_test_1 = get_statistics(X_test_1, y_collisions_1, classifier, df_test_1, freq, threshold_type="mad")
df_test_5 = get_statistics(X_test_5, y_collisions_5, classifier, df_test_5, freq, threshold_type="mad")
Anomaly prediction completed.
Number of anomalies detected: 5 with threshold 25.04360360956374, std
Number of anomalies detected: 8 with threshold 22.712437048157057, mad
Number of anomalies detected: 16 with threshold 21.815066162745158, percentile
Number of anomalies detected: 0 with threshold 42.90918945046632, IQR
Number of anomalies detected: 169 with threshold 0.0, zero

choosen threshold type: mad, with value: 22.7124
F1 Score: 0.0177
Accuracy: 0.6373
Precision: 0.1250
Recall: 0.0095
              precision    recall  f1-score   support

           0       0.65      0.97      0.78       201
           1       0.12      0.01      0.02       105

    accuracy                           0.64       306
   macro avg       0.39      0.49      0.40       306
weighted avg       0.47      0.64      0.52       306

ROC AUC Score: 0.5367
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Anomalies detected: 8
Best threshold: -11.9408 | F1 Score: 0.5215 | Precision: 0.3633 | Recall: 0.9238
Anomalies detected with best threshold: 267

	-------------------------------------------------------------------------------------

c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\src\models_funtions.py:67: RuntimeWarning:

invalid value encountered in divide

Anomaly prediction completed.
Number of anomalies detected: 0 with threshold 34.76196224936218, std
Number of anomalies detected: 0 with threshold 29.971444313062563, mad
Number of anomalies detected: 9 with threshold 27.627885636829195, percentile
Number of anomalies detected: 0 with threshold 61.252890429397425, IQR
Number of anomalies detected: 97 with threshold 0.0, zero

choosen threshold type: mad, with value: 29.9714
F1 Score: 0.0000
Accuracy: 0.7866
Precision: 0.0000
Recall: 0.0000
              precision    recall  f1-score   support

           0       0.79      1.00      0.88       129
           1       0.00      0.00      0.00        35

    accuracy                           0.79       164
   macro avg       0.39      0.50      0.44       164
weighted avg       0.62      0.79      0.69       164

ROC AUC Score: 0.6303
c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\sklearn\metrics\_classification.py:1509: UndefinedMetricWarning:

Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.

c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\sklearn\metrics\_classification.py:1509: UndefinedMetricWarning:

Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\sklearn\metrics\_classification.py:1509: UndefinedMetricWarning:

Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\sklearn\metrics\_classification.py:1509: UndefinedMetricWarning:

Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Anomalies detected: 0
Best threshold: 14.1643 | F1 Score: 0.4301 | Precision: 0.3448 | Recall: 0.5714
Anomalies detected with best threshold: 58

	-------------------------------------------------------------------------------------

c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\src\models_funtions.py:67: RuntimeWarning:

invalid value encountered in divide

Anomaly prediction completed.
Number of anomalies detected: 0 with threshold 12.892018650448753, std
Number of anomalies detected: 0 with threshold 8.626876203219096, mad
Number of anomalies detected: 8 with threshold 6.2170033405224485, percentile
Number of anomalies detected: 0 with threshold 25.798544782400132, IQR
Number of anomalies detected: 63 with threshold 0.0, zero

choosen threshold type: mad, with value: 8.6269
F1 Score: 0.0000
Accuracy: 0.6028
Precision: 0.0000
Recall: 0.0000
              precision    recall  f1-score   support

           0       0.60      1.00      0.75        85
           1       0.00      0.00      0.00        56

    accuracy                           0.60       141
   macro avg       0.30      0.50      0.38       141
weighted avg       0.36      0.60      0.45       141

ROC AUC Score: 0.5731
c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\sklearn\metrics\_classification.py:1509: UndefinedMetricWarning:

Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.

c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\sklearn\metrics\_classification.py:1509: UndefinedMetricWarning:

Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\sklearn\metrics\_classification.py:1509: UndefinedMetricWarning:

Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\sklearn\metrics\_classification.py:1509: UndefinedMetricWarning:

Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Anomalies detected: 0
Best threshold: -15.6667 | F1 Score: 0.6171 | Precision: 0.4538 | Recall: 0.9643
Anomalies detected with best threshold: 119

	-------------------------------------------------------------------------------------

c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\src\models_funtions.py:67: RuntimeWarning:

invalid value encountered in divide

In [ ]:
plot_anomalies_true_and_predicted(df_collisions_raw, df_collisions_raw_action, collisions_zones, df_test, title="Collisions zones vs predicted zones for both recordings")
In [ ]:
plot_anomalies_true_and_predicted(df_collisions_raw_1, df_collisions_raw_action_1, collisions_zones_1, df_test_1, title="Collisions zones vs predicted zones for recording 1")
In [ ]:
plot_anomalies_true_and_predicted(df_collisions_raw_5, df_collisions_raw_action_5, collisions_zones_5, df_test_5, title="Collisions zones vs predicted zones for recording 5")